
import requests
from bs4 import BeautifulSoup
import re
import os

def getHtmlUrl(url):
    try:
        rsp = requests.get(url)
        rsp.raise_for_status()
        rsp.encoding = rsp.apparent_encoding
        return rsp.text
    except:
        return "error"

def getJPG(html):
    soup = BeautifulSoup(html, "html.parser")
    allImg = soup.find('div', class_='list').find('ul').find_all('img')
    for img in allImg:
        imgName = img['alt']
        imgUrl = img['src']
        print("图片名称：" + imgName + "图片地址：" + imgUrl)
        imgPath = "d:/Desktop/Img/" + imgName + ".jpg"
        try:
            if not os.path.exists('d:/Desktop/Img'):
                os.mkdir('d:/Desktop/Img')
            if not os.path.exists(imgPath):
                r = requests.get(imgUrl)
                with open(imgPath, 'wb') as f:
                    f.write(r.content)
                    f.close()
                    print(imgName + "的图片下载成功")
            else:
                print(imgName + "已存在")
        except Exception as e:
            print("抓取图片失败，原因是：" + str(e))

def getBigJPG(html):
    soup = BeautifulSoup(html, "html.parser")
    bigImg = soup.find('div', class_='endpage').find('p').find('img')
    imgName = bigImg['alt']
    imgUrl = bigImg['src']
    print("图片名称：" + imgName + "图片地址：" + imgUrl)
    imgPath = "d:/Desktop/BigImg/" + imgName + ".jpg"
    try:
        if not os.path.exists('d:/Desktop/BigImg'):
            os.mkdir('d:/Desktop/BigImg')
        if not os.path.exists(imgPath):
            r = requests.get(imgUrl)
            with open(imgPath, 'wb') as f:
                f.write(r.content)
                f.close()
                print(imgName + "的图片下载成功")
        else:
            print(imgName + "已存在")
    except Exception as e:
        print("抓取图片失败，原因是：" + str(e))

def getBigImgUrl(img):
    href = img['href']
    bigUrl = "http://www.netbian.com" + href
    print(bigUrl)
    text = getHtmlUrl(bigUrl)

    return text

def getBigImgHtml(html):
    soup = BeautifulSoup(html, 'html.parser')
    all_bigImg_htmlUrl = soup.find('div', class_='list').find('ul').find_all('a', attrs={'href': re.compile('^((?!http).)*$'), 'target': '_blank'})
    for bigImg in all_bigImg_htmlUrl:
        bigImgName = bigImg['title']
        bigImgUrl = bigImg['href']
        print("图片标题：" + bigImgName + ",图片访问界面地址：" + bigImgUrl)
        bigText = getBigImgUrl(bigImg)
        getBigJPG(bigText)
        # print(bigText)